import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
import seaborn as sns
import matplotlib as mpl
from matplotlib.lines import Line2D
import yaml
from scipy import stats
import sys
sys.path.append('../resources/')
from ImagingUtilities import *
import warnings
warnings.filterwarnings('ignore')
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_log_error, r2_score
with open("../data/resources/rcParams.yaml") as f:
rcParamsDict = yaml.full_load(f)
for k in rcParamsDict["rcParams"]:
print("{} {}".format(k,rcParamsDict["rcParams"][k]))
plt.rcParams[k] = rcParamsDict["rcParams"][k]
for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
print("{} {}".format(k1,rcParamsDict[k1]))
figure.dpi 80 savefig.dpi 500 figure.figsize [10, 10] axes.facecolor None figure.facecolor None dotSize 20
line_palette = {
'CTL01A': '#DBB807',
'CTL08A': '#0FB248',
'CTL04E': '#FF0054',
'CTL02A': '#7B00FF',
'H9': '#72190E',
'H1': '#994F88',
'CTL05A': '#1965B0',
'CTL07C': '#437DBF',
'CTL06F': '#CAE0AB',
'CTL09A': '#FFFF00',
'KTD8.2': '#E65518',
'UCSFi001-A': '#7BAFDE'}
total_df = pd.read_csv('../../iPSC_imaging/quantifications/quantification.csv', index_col=0)
add_tp = pd.read_csv('../../iPSC_imaging/quantifications/quantification_addTP.csv', index_col=0)
total_df[ ~ total_df.duplicated()]
total_df = pd.concat([total_df, add_tp])
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df.shape
(2776, 16)
total_df['pixel_size'] = 1.38
total_df['Area (microm2)'] = total_df.total_area * 1.38
total_df['Area (mm2)'] = total_df['Area (microm2)'] / 1000
total_df['line'].unique()
array(['H1', 'CTL04E', 'CTL02A', 'CTL05A', 'H9', 'KTD8.2', 'CTL09A',
'CTL06F', 'CTL08A', 'CTL07C', 'UCSFi001-A', 'CTL01A'], dtype=object)
all_tp = total_df.time_point.unique()
all_tp.sort()
all_tp
array(['01_11_23_t18', '02_11_23_t18', '02_11_23_t9', '03_11_23_t18',
'03_11_23_t9', '04_11_23_t10', '04_11_23_t18', '05_11_23_t18',
'05_11_23_t9', '06_11_23_t18', '06_11_23_t9', '07_11_23_t18',
'07_11_23_t9', '08_11_23_t18', '08_11_23_t9', '09_11_23_t20',
'09_11_23_t9', '10_11_23_t18', '10_11_23_t9', '11_11_23_t18',
'11_11_23_t9', '12_11_23_t13', '12_11_23_t18', '13_11_23_t18',
'13_11_23_t9', '14_11_23_t17', '14_11_23_t9', '15_11_23_t17',
'15_11_23_t9', '16_11_23_t10', '16_11_23_t18', '17_11_23_t17',
'17_11_23_t9', '18_11_23_t10', '18_11_23_t18', '19_11_23_t10',
'19_11_23_t18', '20_11_23_t18', '20_11_23_t9', '21_11_23_t17',
'21_11_23_t9', '22_11_23_t10', '31_10_23_t18', '31_10_23_t9'],
dtype=object)
Here I'm adding a few quantifications:
norm_factor: the normalization factor, corresponding to the mean area of the area percentage of all lines for each time point post split (called split_time)perc_area_norm: the normalized percentage area, corresponding to the percentage area divided by the the normalization factor (1.)mean_area_tp: the mean area of each line at each time point post split (called split_time)area_error: the percentage "error" of the total area computed with respect to the mean of that line at that time pointstd: the standard deviation of each area with respect to (3.)cv: the coefficient of variation, corresponding to the ratio between the standard deviation and the meantotal_df['line_split'] = total_df['line'].astype('str') + '_' + total_df['split_time'].astype('str')
mean_df_time_point = total_df.groupby(['split_time']).mean('perc_area')
mean_df_time_point_dict = {i:j for i, j in zip(mean_df_time_point.index, mean_df_time_point.perc_area)}
mean_df_time_point_dict
area_df_time_point = total_df.groupby(['line','split_time']).mean('Area (microm2)').reset_index()
area_df_time_point['line_split'] = area_df_time_point['line'].astype('str') + '_' + area_df_time_point['split_time'].astype('str')
area_df_time_point = {i:j for i, j in zip(area_df_time_point.line_split, area_df_time_point['Area (microm2)'])}
area_df_time_point
total_df['norm_factor'] = total_df.split_time.map(mean_df_time_point_dict)
total_df['perc_area_norm'] = total_df['perc_area'] / total_df['norm_factor']
total_df['mean_area_tp'] = total_df.line_split.map(area_df_time_point)
total_df['area_error'] = (total_df['mean_area_tp'] - total_df['Area (microm2)']) / total_df['Area (microm2)']
total_df['std'] = np.sqrt((total_df['Area (microm2)'] - total_df['mean_area_tp'])**2 / len(total_df))
total_df['cv'] = total_df['std'] / total_df['mean_area_tp']
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
sns.kdeplot(total_df['cv'])
<Axes: xlabel='cv', ylabel='Density'>
ax = sns.kdeplot(total_df['area_error'])
ax.axvline(15)
<matplotlib.lines.Line2D at 0x7f6c32443160>
ax = sns.kdeplot(total_df['area_error'])
ax.set_xlim(-1, 30)
ax.axvline(10)
<matplotlib.lines.Line2D at 0x7f6c3250f340>
fig, ax = plt.subplots(figsize = (20, 10))
sns.scatterplot(data = total_df, y = 'perc_area', x = 'split_time', ax = ax, hue = 'line', palette=line_palette)
<Axes: xlabel='split_time', ylabel='perc_area'>
total_df = total_df[~((total_df['split_time'] < 25) & (total_df['perc_area'] > 10))]
ax = sns.kdeplot(np.log10(total_df['area_error']))
total_df = total_df[total_df['area_error'] < 5]
total_df.shape
(2170, 25)
idx_max = total_df.groupby('line')['n_split'].idxmax()
# Filter the DataFrame using these indices
filtered_df = total_df.drop(idx_max)
filtered_df
| total_area | perc_area | mean_area_per_colony | n_colonies | time_point | confluency/generation | hour | month | day | line | ... | split_time | pixel_size | Area (microm2) | Area (mm2) | line_split | mean_area_tp | area_error | std | cv | line_n_split | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CTL04E_3_14_11_1.czi | 2532723.0 | 26.606326 | 1.266362e+06 | 2.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL04E | ... | 159.0 | 1.38 | 3495157.74 | 3495.15774 | CTL04E_159.0 | 3.497834e+06 | 0.000766 | 50.791607 | 0.000015 | CTL04E_3 |
| CTL02A_3_14_11_3.czi | 1051145.0 | 11.042308 | 1.313931e+05 | 8.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL02A | ... | 44.0 | 1.38 | 1450580.10 | 1450.58010 | CTL02A_44.0 | 1.548576e+06 | 0.067556 | 1859.928320 | 0.001201 | CTL02A_3 |
| CHD8WT_3_14_11_1.czi | 1655866.0 | 17.394919 | 1.655866e+05 | 10.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | H9 | ... | 44.0 | 1.38 | 2285095.08 | 2285.09508 | H9_44.0 | 9.615470e+05 | -0.579209 | 25120.598728 | 0.026125 | H9_3 |
| KTD8.2_3_14_11_4.czi | 862216.0 | 9.057603 | 2.874053e+05 | 3.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | KTD8.2 | ... | 72.0 | 1.38 | 1189858.08 | 1189.85808 | KTD8.2_72.0 | 7.105711e+05 | -0.402810 | 9096.742330 | 0.012802 | KTD8.2_3 |
| CTL09A_4_14_11_5.czi | 165602.0 | 1.739654 | 2.760033e+04 | 6.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL09A | ... | 24.0 | 1.38 | 228530.76 | 228.53076 | CTL09A_24.0 | 2.885001e+05 | 0.262413 | 1138.202417 | 0.003945 | CTL09A_4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| CTL08A_5_22_11_2.czi | 946317.0 | 9.941087 | 6.759407e+04 | 14.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 1305917.46 | 1305.91746 | CTL08A_17.0 | 1.797598e+06 | 0.376502 | 9331.962580 | 0.005191 | CTL08A_5 |
| CTL08A_5_22_11_3.czi | 638843.0 | 6.711064 | 1.064738e+05 | 6.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 881603.34 | 881.60334 | CTL08A_17.0 | 1.797598e+06 | 1.039010 | 17385.334447 | 0.009671 | CTL08A_5 |
| CTL01A_5_22_11_4.czi | 834622.0 | 8.767728 | 5.961586e+04 | 14.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL01A | ... | 17.0 | 1.38 | 1151778.36 | 1151.77836 | CTL01A_17.0 | 1.359752e+06 | 0.180567 | 3947.276901 | 0.002903 | CTL01A_5 |
| CTL08A_5_22_11_4.czi | 857634.0 | 9.009469 | 1.072042e+05 | 8.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 1183534.92 | 1183.53492 | CTL08A_17.0 | 1.797598e+06 | 0.518838 | 11654.751425 | 0.006484 | CTL08A_5 |
| CHD8WT_5_22_11_1.czi | 245495.0 | 2.578932 | 1.227475e+05 | 2.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | H9 | ... | 17.0 | 1.38 | 338783.10 | 338.78310 | H9_17.0 | 1.725624e+06 | 4.093595 | 26321.881011 | 0.015254 | H9_5 |
2151 rows × 25 columns
total_df['logArea'] = np.log10(total_df['Area (microm2)'] + 0.000001)
Here we fitted a polynomial regression function of order 3 (exploratory to look at what type of shapes we expect from the curves):
order = total_df.line.unique().tolist()
order.sort()
sns.set_theme(style="ticks")
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
# Draw a line plot to show the trajectory of each random walk
grid.map(sns.regplot, "split_time", "Area (mm2)", order = 3)
grid.set_axis_labels("Time point post split", "Area (mm2)")
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
Without fitting any regression (line goes through the mean and the highligthed data around is the standard deviation):
sns.set_theme(style="ticks")
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
# Draw a line plot to show the trajectory of each random walk
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)
grid.set_axis_labels("Time point post split", "Total Area (pixels)")
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
We use the area then average all the FOV for a specific time points in each line. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(4, 3, figsize=(30, 21), gridspec_kw={'hspace': 0.7})
ax = ax.flatten()
for ax_index, line in enumerate(total_df.line.unique()):
sub = total_df[(total_df.line == line) & (total_df.n_split != 'day')].sort_values(by='datetime')
mean_st = sub.groupby('split_time')['Area (mm2)'].mean()
std_st = sub.groupby('split_time')['Area (mm2)'].std()
y_pos = mean_st.index
ydata = sub['Area (microm2)'].values
xdata = sub.split_time.values.astype('int')
farray = mean_st.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=3)
# Error formatting
upper_err = gaussian_filter1d(farray + (std_st / 2).values, sigma=3)
lower_err = gaussian_filter1d(farray - (std_st / 2).values, sigma=3)
ax[ax_index].scatter(xdata, ydata)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color='#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
# Use meaningful limits for better visualization
ax[ax_index].set_ylim(0, np.max(farray) * 1.25)
# Rotate x-axis labels for better readability
ax[ax_index].tick_params(axis='x', rotation=90)
# Set axis labels and title
ax[ax_index].set_title(f'Line {line}')
ax[ax_index].set_ylabel('Total area')
ax[ax_index].set_xlabel('Time point after split')
plt.tight_layout() # Adjust layout to prevent overlapping
plt.show()
Here we fitted a polynomial regression function of order 3:
sns.set_theme(style="ticks")
split_palette = {'1': '#264653', '2': '#2a9d8f', '3': '#8ab17d', '4': '#e9c46a', '5': '#f4a261', '6': '#e76f51'}
line_split_palette = {}
for i in total_df.line_n_split.unique():
split_n = i.split('_')[-1]
line_split_palette[i] = split_palette[split_n]
custom_handles = [Line2D([0], [0], color=color, lw=2) for color in split_palette.values()]
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line_n_split', palette=line_split_palette,
col_wrap=4, height=5, col_order = order)
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)
grid.add_legend()
if grid._legend:
grid._legend.remove()
legend = grid.fig.legend(custom_handles, split_palette.keys(), ncol=2, frameon=False, bbox_to_anchor = (1.2,1), fontsize = 25)
legend.set_title('Passage number', prop={'size': 30})
for ax in grid.axes.flat:
ax.set_title(ax.get_title(), fontsize=35)
# Increase the size of x and y tick labels
for ax in grid.axes.flat:
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
grid.set_axis_labels("Hours after splitting", "Total area (mm2)", fontsize = 25)
grid.fig.tight_layout(w_pad=1)
grid.fig.savefig('./figures/raw_GC_iPSC_dividedSplit.svg', dpi = 300, bbox_inches = 'tight')
I need to filter out the combination of "line" - "number of split" that do not have enough data point to fit an order 3 polynomial regression:
sns.set_theme(style="ticks")
boolean_sel = pd.Series(total_df.groupby(['line_n_split'])['split_time'].count() > 5)
boolean_sel = boolean_sel[boolean_sel]
filtered_total = total_df[total_df.line_n_split.isin(boolean_sel.index)]
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(filtered_total.sort_values(by = 'split_time'), col="line_n_split", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
grid.map(sns.regplot, "split_time", "Area (microm2)", order = 3)
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
We use the area then average all the FOV for a specific time points in each line at each split. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
summary_dfs_dict = {}
for l in order:
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
splits = sub.n_split.unique().tolist()
splits.sort()
for split in splits:
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 5:
ydata = subsub['Area (microm2)'].values
xdata = subsub.split_time.values.astype('int')
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_dfs_dict[f'{l}_split_{split}'] = summary_df
farray = np.array(summary_df['mean'])
y_pos = summary_df.split_time.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=3)
# Error formatting
upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=3)
lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=3)
ax[ax_index].scatter(xdata, ydata)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
#ax[ax_index].errorbar(y_pos, farray, yerr=summary_df['stds'], fmt='none', color='crimson', alpha=0.5)
#
ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
else:
print(f'Skipped split {subsub.n_split.values[0]} of line {subsub.line.values[0]}')
plt.show()
Skipped split 3 of line CTL01A Skipped split 1 of line CTL04E Skipped split 1 of line CTL09A Skipped split 1 of line UCSFi001-A
We use the logarithm of the area then sum all the FOV for a specific time points in each line at each split. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in order:
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
splits = sub.n_split.unique().tolist()
splits.sort()
for split in splits:
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='logArea', final_output='area_sum')
farray = np.array(summary_df['area_sum'])
y_pos = summary_df.datetime.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=1)
# Error formatting
upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=1)
lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=1)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
#
ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Log total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
lines = total_df.line.unique()
We use the total area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative.
discrete_deriv_curves = {}
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in total_df.line.unique():
#color = color_dict[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
discrete_deriv_curves[f'{l}_split_{split}'] = summary_df
farray = np.array(summary_df['derivative'])
y_pos = summary_df.split_time.values
ax[ax_index].errorbar(y_pos, farray, marker = 'o')
ax[ax_index]
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
We collected the results for each line and each split and we can use it to converge to a single result for each line using as replicates the splits.
deriv_df = pd.concat(discrete_deriv_curves.values(), keys = discrete_deriv_curves.keys()).reset_index()
deriv_df['line'] = deriv_df['level_0'].apply(lambda x: x.split('_')[0])
deriv_df['split'] = deriv_df['level_0'].apply(lambda x: x.split('_')[-1])
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for line in deriv_df.line.unique():
#color = color_dict[l]
sub = deriv_df[deriv_df.line == line]
sub = sub.sort_values('split_time')
sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)
sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', hue = 'split', ax = ax[ax_index], errorbar='sd', markers = True, palette=split_palette)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3)) #, gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for line in deriv_df.line.unique():
#color = color_dict[l]
sub = deriv_df[deriv_df.line == line]
sub = sub.sort_values('split_time')
sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)
sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', ax = ax[ax_index], errorbar='sd', markers = True, err_style = 'bars')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.tight_layout()
We use the total area and then average all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the cumulative sum over the discrete differential of the growth. with the cumsum() function.
fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9))
ax = ax.flatten().T
ax_index = 0
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()
for line_n in order_line_n:
sub = total_df[total_df.line_n_split == line_n]
#sub = sub[~sub.index.duplicated()]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
#sub['smoothed'] = sub.groupby('line_n_split')['Area (microm2)'].apply(gaussian_filter1d, sigma = 3).loc[line_n]
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
x = sub.split_time.unique()
sns.lineplot(y = y, x = x, ax = ax[ax_index], errorbar='sd', markers = True, err_style='bars')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}')
ax[ax_index].set_ylabel('Cumulative of mean total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.tight_layout()
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 1)
summary_df['cumulative'] = summary_df['smoothed'].cumsum()
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
line, = ax.plot(y_pos, farray, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f6c31d94310>
lines = total_df.line.unique()
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative growth', lines = lines, fontsize = 20)
#plt.savefig('growth_curve_per_line.pdf', dpi = 300)
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0
fitted_param = {}
def exp_model(t, a, b):
return a * np.exp(b * t)
for line_n in order_line_n:
fitted_param[line_n] = {}
sub = total_df[total_df.line_n_split == line_n]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
x = np.array(sub.split_time.unique())
hue = [line_n.split('_')[0]]*len(y)
try:
popt, pcov = curve_fit(exp_model, x, y, p0=(max(y), 0.1))
fitted_param[line_n]['a'] = popt[0]
fitted_param[line_n]['rate'] = popt[1]
fitted_param[line_n]['mean_cum'] = y
fitted_param[line_n]['split_time'] = x
a, b = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b)
fitted_param[line_n]['y_fitted'] = y_fitted
fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}', fontsize = 40)
ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
ax[ax_index].set_xlabel('Time point', fontsize = 35)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split.svg', dpi = 300, bbox_inches = 'tight')
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 5:
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['cumulative'] = summary_df['mean'].cumsum()
farray = np.array(summary_df['cumulative'].values)
y_pos = summary_df.split_time.values
popt, pcov = curve_fit(exp_model, y_pos, farray, p0=(max(farray), 0.1))
a, b = popt
y_fitted = exp_model(y_pos, a, b)
line, = ax.plot(y_pos, y_fitted, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f6c32173730>
lines = total_df.line.unique()
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative area (mm2)', lines = lines, fontsize = 20)
plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)
fitted_df = pd.DataFrame.from_dict(fitted_param).T
fitted_df['Line'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[0]).values
fitted_df['split'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[1]).values
fitted_df = fitted_df[~fitted_df.a.isna()]
#fitted_df
fitted_df_filtered = fitted_df[fitted_df['r2'] > 0.9]
fitted_df_filtered
| a | rate | mean_cum | split_time | y_fitted | MSLE | r2 | Line | split | |
|---|---|---|---|---|---|---|---|---|---|
| CTL01A_2 | 44.192008 | 0.032972 | [48.98585999999999, 195.08369999999996, 285.83... | [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] | [222.33018031900514, 289.43715927487756, 474.6... | 0.388249 | 0.951651 | CTL01A | 2 |
| CTL01A_4 | 945.527198 | 0.032063 | [407.73135, 650.817798, 1470.892803, 2865.8591... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] | [945.5271977458456, 1222.0016311780946, 2107.6... | 0.18731 | 0.936356 | CTL01A | 4 |
| CTL02A_1 | 411.679362 | 0.023629 | [405.32601, 534.56623, 880.29901, 1318.17853, ... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [1034.6010611189106, 1279.7641959460173, 1824.... | 0.233463 | 0.971479 | CTL02A | 1 |
| CTL02A_2 | 345.17392 | 0.043891 | [481.00286399999993, 682.5998189999999, 1748.9... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] | [666.7525349575958, 989.739028170901, 1911.821... | 0.038571 | 0.992569 | CTL02A | 2 |
| CTL02A_3 | 3436.282608 | 0.019792 | [627.73808, 992.027444, 1508.292068, 2111.0835... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [3436.282608164845, 3793.7273295904215, 5105.0... | 0.579326 | 0.962535 | CTL02A | 3 |
| CTL02A_5 | 235.308712 | 0.039375 | [248.92647, 348.61422, 667.6209999999999, 927.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [235.30871162314125, 441.8201612979932, 605.40... | 0.018336 | 0.982764 | CTL02A | 5 |
| CTL04E_2 | 226.365471 | 0.02725 | [114.00317999999999, 419.92985999999996, 648.0... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [435.3502598087916, 556.3518577625465, 860.403... | 0.222511 | 0.980698 | CTL04E | 2 |
| CTL04E_3 | 539.373742 | 0.022668 | [807.9485999999999, 925.2285899999999, 1018.98... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [757.8076669846054, 929.3097891009512, 1305.65... | 0.064937 | 0.979539 | CTL04E | 3 |
| CTL04E_4 | 1709.286905 | 0.023495 | [463.8341, 808.1382349999999, 1096.15610299999... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [1709.2869051554621, 2062.744367831728, 3075.4... | 0.342903 | 0.97955 | CTL04E | 4 |
| CTL05A_1 | 293.207013 | 0.031328 | [296.48830799999996, 923.25726, 1722.042108, 2... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [994.9181087589576, 1318.9747614353005, 2110.1... | 0.164421 | 0.997846 | CTL05A | 1 |
| CTL05A_2 | 29.454331 | 0.040786 | [78.09385499999999, 193.287147, 241.072407, 38... | [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... | [42.51742389389032, 78.39046649841391, 113.156... | 0.188402 | 0.998611 | CTL05A | 2 |
| CTL05A_3 | 735.796188 | 0.020412 | [544.2033449999999, 640.3876889999999, 671.980... | [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... | [735.7961876719627, 999.3719254230257, 1176.64... | 0.164413 | 0.981179 | CTL05A | 3 |
| CTL06F_1 | 71.504348 | 0.038951 | [71.0148, 191.69752499999998, 376.873929, 754.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... | [71.50434825019778, 326.6395949886104, 463.783... | 0.035333 | 0.998462 | CTL06F | 1 |
| CTL06F_2 | 86.83855 | 0.046409 | [84.18644, 202.243508, 373.50316399999997, 681... | [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... | [86.8385498371545, 131.85924957029783, 264.509... | 0.065233 | 0.998126 | CTL06F | 2 |
| CTL06F_3 | 2179.790117 | 0.020238 | [317.817864, 537.075576, 993.100164, 1955.1224... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [2179.7901169835372, 2411.9067318461553, 3267.... | 0.534267 | 0.985096 | CTL06F | 3 |
| CTL06F_5 | 457.640381 | 0.031442 | [378.69431000000003, 785.843438, 1021.701998, ... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [457.64038083814654, 756.8410668189582, 973.29... | 0.007437 | 0.995491 | CTL06F | 5 |
| CTL07C_2 | 73.894511 | 0.03302 | [37.458996, 104.79609599999999, 185.987292, 28... | [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... | [99.46594192154207, 163.22177467691895, 219.70... | 0.128169 | 0.9895 | CTL07C | 2 |
| CTL07C_3 | 685.286497 | 0.034412 | [913.3792199999999, 1250.2052039999999, 1652.8... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [685.2864972375706, 1148.2787682215853, 1565.1... | 0.013741 | 0.998142 | CTL07C | 3 |
| CTL07C_4 | 1522.709487 | 0.021389 | [182.26073999999997, 304.74484799999993, 570.8... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [1522.7094867743351, 1694.580661740262, 2335.5... | 0.829412 | 0.972545 | CTL07C | 4 |
| CTL07C_6 | 498.902314 | 0.040869 | [273.1388, 520.0346, 1032.9212599999998, 2528.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [498.90231351445, 959.4102068353367, 1330.4505... | 0.139212 | 0.967463 | CTL07C | 6 |
| CTL08A_1 | 346.700733 | 0.03576 | [127.16147999999998, 431.94275999999996, 1240.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [346.7007334499812, 1398.4393021645658, 1929.3... | 0.328524 | 0.977399 | CTL08A | 1 |
| CTL08A_2 | 231.031507 | 0.034296 | [62.86313999999999, 162.41026799999997, 252.89... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [231.03150707014152, 386.44678196779114, 526.1... | 0.307433 | 0.995233 | CTL08A | 2 |
| CTL08A_3 | 1195.731872 | 0.023814 | [140.72618999999997, 259.12742999999995, 598.6... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [1195.7318723513963, 1346.931356484199, 1925.2... | 0.872422 | 0.967654 | CTL08A | 3 |
| CTL09A_2 | 100.290935 | 0.036553 | [158.31083999999998, 305.1929799999999, 625.27... | [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... | [335.05944132725455, 601.3324964971382, 805.58... | 0.143439 | 0.984661 | CTL09A | 2 |
| CTL09A_3 | 722.000766 | 0.031095 | [416.53340399999996, 845.522964, 1353.60639600... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [1151.079689056732, 1522.8110877598822, 2427.8... | 0.211774 | 0.977202 | CTL09A | 3 |
| CTL09A_4 | 1105.844842 | 0.026066 | [325.85663999999997, 564.9259079999999, 1003.4... | [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... | [1398.2275206445206, 2067.206814931102, 2546.5... | 0.420498 | 0.988261 | CTL09A | 4 |
| CTL09A_5 | 373.444343 | 0.039375 | [380.20518, 567.604212, 948.6578159999999, 174... | [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] | [373.4443426152249, 511.7165300770923, 960.807... | 0.015238 | 0.985174 | CTL09A | 5 |
| H1_1 | 247.94659 | 0.02451 | [48.570755999999996, 109.48395599999999, 128.7... | [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... | [309.14170871482054, 446.50196530762344, 556.7... | 0.841312 | 0.97756 | H1 | 1 |
| H1_2 | 1205.112667 | 0.015274 | [940.9516199999999, 1109.298648, 1189.804168, ... | [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... | [1205.1126672840826, 1515.405420418225, 2186.4... | 0.165697 | 0.950816 | H1 | 2 |
| H9_1 | 929.110322 | 0.028899 | [143.72009999999997, 367.2745799999999, 947.59... | [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... | [929.1103223762867, 1433.277124436774, 1859.03... | 0.595617 | 0.97611 | H9 | 1 |
| H9_2 | 209.235095 | 0.047362 | [114.86429999999999, 704.2667159999999, 1200.7... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [425.76448866501073, 652.0642337088078, 1326.8... | 0.245584 | 0.998409 | H9 | 2 |
| H9_3 | 2342.416999 | 0.018932 | [344.70881999999995, 429.92740799999996, 542.4... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [2342.416999320976, 2574.9795323832, 3420.6015... | 0.938483 | 0.963001 | H9 | 3 |
| H9_5 | 327.17464 | 0.028579 | [308.090865, 343.77042, 682.5535199999999, 935... | [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] | [327.1746404878582, 516.8499677508379, 531.833... | 0.040654 | 0.963915 | H9 | 5 |
| KTD8.2_1 | 104.47087 | 0.03995 | [209.46329999999998, 355.660845, 580.268264999... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [104.47087006795536, 496.18993729676833, 710.8... | 0.08602 | 0.987708 | KTD8.2 | 1 |
| KTD8.2_2 | 267.491 | 0.03443 | [271.62816, 429.56363999999996, 1188.767879999... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [267.4910002980408, 448.3342142395313, 611.191... | 0.059554 | 0.983557 | KTD8.2 | 2 |
| KTD8.2_3 | 571.418893 | 0.020138 | [40.886087999999994, 148.963548, 230.079948, 3... | [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... | [684.9629488261039, 1004.2408456227806, 1110.6... | 1.101126 | 0.984485 | KTD8.2 | 3 |
| KTD8.2_4 | 543.529985 | 0.028306 | [539.3101333333333, 824.4388333333333, 1098.55... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [543.5299854659438, 854.8923367343343, 1072.14... | 0.003265 | 0.986757 | KTD8.2 | 4 |
| UCSFi001-A_2 | 196.98018 | 0.027632 | [179.12193, 422.4456, 664.010115, 768.59238, 1... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [382.3265106939781, 490.27400788144416, 762.86... | 0.098251 | 0.969733 | UCSFi001-A | 2 |
| UCSFi001-A_3 | 372.597267 | 0.026192 | [313.51438, 394.810525, 511.56956499999995, 92... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... | [372.5972665773523, 551.9061476057238, 698.619... | 0.025112 | 0.99524 | UCSFi001-A | 3 |
| UCSFi001-A_4 | 2047.342656 | 0.022357 | [250.04633999999996, 519.1565519999999, 1278.6... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [2047.3426556901313, 2448.3183682685094, 3580.... | 0.685379 | 0.979739 | UCSFi001-A | 4 |
| UCSFi001-A_5 | 343.308488 | 0.052116 | [675.93918, 889.3241639999999, 1171.628004, 18... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [343.30848825179106, 790.359528544613, 1199.20... | 0.100988 | 0.980453 | UCSFi001-A | 5 |
fig, ax = plt.subplots()
sns.barplot(data = fitted_df_filtered, x = 'split', y = 'rate', order = ['1', '2', '3', '4', '5', '6'], ax = ax)#, hue = 'Line', palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
fig, ax = plt.subplots(figsize = (10,4))
fitted_df_filtered['split'] = fitted_df_filtered['split'].astype('int')
sns.lineplot(data = fitted_df_filtered, x = 'split', y = 'rate', hue = 'Line',
ax = ax, palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
ax.legend(bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f6c2a74f280>
fig, ax = plt.subplots()
sns.boxplot(data = fitted_df_filtered, x = 'Line', y = 'rate', ax = ax, palette=line_palette)
_ = ax.set_ylabel('Rate of area growth', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15, rotation = 90)
fitted_df_filtered.sort_values(by = 'rate').to_csv('../../data/csv/iPSC_fitted_exp_area_sum_per_split.csv')
fitted_df_filtered.sort_values(by = 'rate')
| a | rate | mean_cum | split_time | y_fitted | MSLE | r2 | Line | split | |
|---|---|---|---|---|---|---|---|---|---|
| H1_2 | 1205.112667 | 0.015274 | [940.9516199999999, 1109.298648, 1189.804168, ... | [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... | [1205.1126672840826, 1515.405420418225, 2186.4... | 0.165697 | 0.950816 | H1 | 2 |
| H9_3 | 2342.416999 | 0.018932 | [344.70881999999995, 429.92740799999996, 542.4... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [2342.416999320976, 2574.9795323832, 3420.6015... | 0.938483 | 0.963001 | H9 | 3 |
| CTL02A_3 | 3436.282608 | 0.019792 | [627.73808, 992.027444, 1508.292068, 2111.0835... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [3436.282608164845, 3793.7273295904215, 5105.0... | 0.579326 | 0.962535 | CTL02A | 3 |
| KTD8.2_3 | 571.418893 | 0.020138 | [40.886087999999994, 148.963548, 230.079948, 3... | [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... | [684.9629488261039, 1004.2408456227806, 1110.6... | 1.101126 | 0.984485 | KTD8.2 | 3 |
| CTL06F_3 | 2179.790117 | 0.020238 | [317.817864, 537.075576, 993.100164, 1955.1224... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [2179.7901169835372, 2411.9067318461553, 3267.... | 0.534267 | 0.985096 | CTL06F | 3 |
| CTL05A_3 | 735.796188 | 0.020412 | [544.2033449999999, 640.3876889999999, 671.980... | [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... | [735.7961876719627, 999.3719254230257, 1176.64... | 0.164413 | 0.981179 | CTL05A | 3 |
| CTL07C_4 | 1522.709487 | 0.021389 | [182.26073999999997, 304.74484799999993, 570.8... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [1522.7094867743351, 1694.580661740262, 2335.5... | 0.829412 | 0.972545 | CTL07C | 4 |
| UCSFi001-A_4 | 2047.342656 | 0.022357 | [250.04633999999996, 519.1565519999999, 1278.6... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [2047.3426556901313, 2448.3183682685094, 3580.... | 0.685379 | 0.979739 | UCSFi001-A | 4 |
| CTL04E_3 | 539.373742 | 0.022668 | [807.9485999999999, 925.2285899999999, 1018.98... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [757.8076669846054, 929.3097891009512, 1305.65... | 0.064937 | 0.979539 | CTL04E | 3 |
| CTL04E_4 | 1709.286905 | 0.023495 | [463.8341, 808.1382349999999, 1096.15610299999... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [1709.2869051554621, 2062.744367831728, 3075.4... | 0.342903 | 0.97955 | CTL04E | 4 |
| CTL02A_1 | 411.679362 | 0.023629 | [405.32601, 534.56623, 880.29901, 1318.17853, ... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [1034.6010611189106, 1279.7641959460173, 1824.... | 0.233463 | 0.971479 | CTL02A | 1 |
| CTL08A_3 | 1195.731872 | 0.023814 | [140.72618999999997, 259.12742999999995, 598.6... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [1195.7318723513963, 1346.931356484199, 1925.2... | 0.872422 | 0.967654 | CTL08A | 3 |
| H1_1 | 247.94659 | 0.02451 | [48.570755999999996, 109.48395599999999, 128.7... | [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... | [309.14170871482054, 446.50196530762344, 556.7... | 0.841312 | 0.97756 | H1 | 1 |
| CTL09A_4 | 1105.844842 | 0.026066 | [325.85663999999997, 564.9259079999999, 1003.4... | [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... | [1398.2275206445206, 2067.206814931102, 2546.5... | 0.420498 | 0.988261 | CTL09A | 4 |
| UCSFi001-A_3 | 372.597267 | 0.026192 | [313.51438, 394.810525, 511.56956499999995, 92... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... | [372.5972665773523, 551.9061476057238, 698.619... | 0.025112 | 0.99524 | UCSFi001-A | 3 |
| CTL04E_2 | 226.365471 | 0.02725 | [114.00317999999999, 419.92985999999996, 648.0... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [435.3502598087916, 556.3518577625465, 860.403... | 0.222511 | 0.980698 | CTL04E | 2 |
| UCSFi001-A_2 | 196.98018 | 0.027632 | [179.12193, 422.4456, 664.010115, 768.59238, 1... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [382.3265106939781, 490.27400788144416, 762.86... | 0.098251 | 0.969733 | UCSFi001-A | 2 |
| KTD8.2_4 | 543.529985 | 0.028306 | [539.3101333333333, 824.4388333333333, 1098.55... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [543.5299854659438, 854.8923367343343, 1072.14... | 0.003265 | 0.986757 | KTD8.2 | 4 |
| H9_5 | 327.17464 | 0.028579 | [308.090865, 343.77042, 682.5535199999999, 935... | [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] | [327.1746404878582, 516.8499677508379, 531.833... | 0.040654 | 0.963915 | H9 | 5 |
| H9_1 | 929.110322 | 0.028899 | [143.72009999999997, 367.2745799999999, 947.59... | [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... | [929.1103223762867, 1433.277124436774, 1859.03... | 0.595617 | 0.97611 | H9 | 1 |
| CTL09A_3 | 722.000766 | 0.031095 | [416.53340399999996, 845.522964, 1353.60639600... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [1151.079689056732, 1522.8110877598822, 2427.8... | 0.211774 | 0.977202 | CTL09A | 3 |
| CTL05A_1 | 293.207013 | 0.031328 | [296.48830799999996, 923.25726, 1722.042108, 2... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [994.9181087589576, 1318.9747614353005, 2110.1... | 0.164421 | 0.997846 | CTL05A | 1 |
| CTL06F_5 | 457.640381 | 0.031442 | [378.69431000000003, 785.843438, 1021.701998, ... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [457.64038083814654, 756.8410668189582, 973.29... | 0.007437 | 0.995491 | CTL06F | 5 |
| CTL01A_4 | 945.527198 | 0.032063 | [407.73135, 650.817798, 1470.892803, 2865.8591... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] | [945.5271977458456, 1222.0016311780946, 2107.6... | 0.18731 | 0.936356 | CTL01A | 4 |
| CTL01A_2 | 44.192008 | 0.032972 | [48.98585999999999, 195.08369999999996, 285.83... | [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] | [222.33018031900514, 289.43715927487756, 474.6... | 0.388249 | 0.951651 | CTL01A | 2 |
| CTL07C_2 | 73.894511 | 0.03302 | [37.458996, 104.79609599999999, 185.987292, 28... | [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... | [99.46594192154207, 163.22177467691895, 219.70... | 0.128169 | 0.9895 | CTL07C | 2 |
| CTL08A_2 | 231.031507 | 0.034296 | [62.86313999999999, 162.41026799999997, 252.89... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [231.03150707014152, 386.44678196779114, 526.1... | 0.307433 | 0.995233 | CTL08A | 2 |
| CTL07C_3 | 685.286497 | 0.034412 | [913.3792199999999, 1250.2052039999999, 1652.8... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [685.2864972375706, 1148.2787682215853, 1565.1... | 0.013741 | 0.998142 | CTL07C | 3 |
| KTD8.2_2 | 267.491 | 0.03443 | [271.62816, 429.56363999999996, 1188.767879999... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [267.4910002980408, 448.3342142395313, 611.191... | 0.059554 | 0.983557 | KTD8.2 | 2 |
| CTL08A_1 | 346.700733 | 0.03576 | [127.16147999999998, 431.94275999999996, 1240.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [346.7007334499812, 1398.4393021645658, 1929.3... | 0.328524 | 0.977399 | CTL08A | 1 |
| CTL09A_2 | 100.290935 | 0.036553 | [158.31083999999998, 305.1929799999999, 625.27... | [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... | [335.05944132725455, 601.3324964971382, 805.58... | 0.143439 | 0.984661 | CTL09A | 2 |
| CTL06F_1 | 71.504348 | 0.038951 | [71.0148, 191.69752499999998, 376.873929, 754.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... | [71.50434825019778, 326.6395949886104, 463.783... | 0.035333 | 0.998462 | CTL06F | 1 |
| CTL09A_5 | 373.444343 | 0.039375 | [380.20518, 567.604212, 948.6578159999999, 174... | [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] | [373.4443426152249, 511.7165300770923, 960.807... | 0.015238 | 0.985174 | CTL09A | 5 |
| CTL02A_5 | 235.308712 | 0.039375 | [248.92647, 348.61422, 667.6209999999999, 927.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [235.30871162314125, 441.8201612979932, 605.40... | 0.018336 | 0.982764 | CTL02A | 5 |
| KTD8.2_1 | 104.47087 | 0.03995 | [209.46329999999998, 355.660845, 580.268264999... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [104.47087006795536, 496.18993729676833, 710.8... | 0.08602 | 0.987708 | KTD8.2 | 1 |
| CTL05A_2 | 29.454331 | 0.040786 | [78.09385499999999, 193.287147, 241.072407, 38... | [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... | [42.51742389389032, 78.39046649841391, 113.156... | 0.188402 | 0.998611 | CTL05A | 2 |
| CTL07C_6 | 498.902314 | 0.040869 | [273.1388, 520.0346, 1032.9212599999998, 2528.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [498.90231351445, 959.4102068353367, 1330.4505... | 0.139212 | 0.967463 | CTL07C | 6 |
| CTL02A_2 | 345.17392 | 0.043891 | [481.00286399999993, 682.5998189999999, 1748.9... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] | [666.7525349575958, 989.739028170901, 1911.821... | 0.038571 | 0.992569 | CTL02A | 2 |
| CTL06F_2 | 86.83855 | 0.046409 | [84.18644, 202.243508, 373.50316399999997, 681... | [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... | [86.8385498371545, 131.85924957029783, 264.509... | 0.065233 | 0.998126 | CTL06F | 2 |
| H9_2 | 209.235095 | 0.047362 | [114.86429999999999, 704.2667159999999, 1200.7... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [425.76448866501073, 652.0642337088078, 1326.8... | 0.245584 | 0.998409 | H9 | 2 |
| UCSFi001-A_5 | 343.308488 | 0.052116 | [675.93918, 889.3241639999999, 1171.628004, 18... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [343.30848825179106, 790.359528544613, 1199.20... | 0.100988 | 0.980453 | UCSFi001-A | 5 |
pd.DataFrame(fitted_df.groupby('Line')['rate'].mean().sort_values())
| rate | |
|---|---|
| Line | |
| H1 | 0.019892 |
| CTL01A | 0.02909 |
| CTL04E | 0.029604 |
| CTL08A | 0.029916 |
| KTD8.2 | 0.030706 |
| CTL05A | 0.030842 |
| H9 | 0.030943 |
| CTL02A | 0.031672 |
| UCSFi001-A | 0.032074 |
| CTL07C | 0.032422 |
| CTL09A | 0.033272 |
| CTL06F | 0.03426 |
data_tot = []
for line_n in fitted_df.index:
sub = fitted_df.loc[line_n]
data = pd.DataFrame({'mean_cum': sub['mean_cum'], 'split_time': sub['split_time'], 'line_n': [line_n] * len(sub['mean_cum'])})
data_tot.append(data)
data_tot = pd.concat(data_tot)
data_tot['Line'] = data_tot['line_n'].apply(lambda x: x.split('_')[0]).values
data_tot['split'] = data_tot['line_n'].apply(lambda x: x.split('_')[1]).values
fig, ax = plt.subplots(4,3, figsize = (20, 20))
ax = ax.flatten().T
ax_index = 0
fitted_param_line = {}
for line in order:
fitted_param_line[line] = {}
sub = data_tot[data_tot.Line == line]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub['mean_cum'].cumsum().values
x = np.array(sub.split_time)
try:
popt, pcov = curve_fit(exp_model, x, y, p0=(max(y), 0.1))
fitted_param_line[line]['intercept'] = popt[0]
fitted_param_line[line]['rate'] = popt[1]
fitted_param_line[line]['mean_cum'] = y
fitted_param_line[line]['split_time'] = x
a, b = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b)
fitted_param_line[line]['y_fitted'] = y_fitted
fitted_param_line[line]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param_line[line]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}', fontsize = 30)
ax[ax_index].set_ylabel('Increase in area ', fontsize = 20)
ax[ax_index].set_xlabel('Time point', fontsize = 20)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 15)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 15)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
fitted_df_line = pd.DataFrame.from_dict(fitted_param_line).T
fitted_df_line = fitted_df_line[~fitted_df_line.intercept.isna()]
fitted_df_line.sort_values('rate')
| intercept | rate | mean_cum | split_time | y_fitted | MSLE | r2 | |
|---|---|---|---|---|---|---|---|
| KTD8.2 | 9281.067738 | 0.015832 | [209.46329999999998, 748.7734333333333, 1020.4... | [0.0, 0.0, 0.0, 9.0, 15.0, 16.0, 24.0, 24.0, 2... | [9281.067738201718, 9281.067738201718, 9281.06... | 1.173708 | 0.964499 |
| CTL01A | 8256.479812 | 0.016085 | [386.42207999999994, 794.15343, 1444.971227999... | [0.0, 0.0, 8.0, 16.0, 17.0, 24.0, 25.0, 33.0, ... | [8256.479811779696, 8256.479811779696, 9390.31... | 1.130135 | 0.805506 |
| CTL04E | 15295.49368 | 0.018186 | [463.8341, 1094.28342, 1902.4216549999999, 271... | [0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 24.0, ... | [15295.493679955274, 15295.493679955274, 17690... | 1.165687 | 0.927465 |
| CTL07C | 10371.622493 | 0.020075 | [273.1388, 455.39954, 1368.77876, 1673.523608,... | [0.0, 0.0, 0.0, 5.0, 9.0, 15.0, 16.0, 20.0, 24... | [10371.62249340715, 10371.62249340715, 10371.6... | 1.201451 | 0.983757 |
| H9 | 13110.461326 | 0.020097 | [143.72009999999997, 451.810965, 796.519785, 1... | [0.0, 0.0, 0.0, 5.0, 15.0, 15.0, 16.0, 17.0, 2... | [13110.461325598491, 13110.461325598491, 13110... | 2.169899 | 0.95942 |
| UCSFi001-A | 12323.816373 | 0.0206 | [313.51438, 563.56072, 1239.4998999999998, 175... | [0.0, 0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 2... | [12323.816373008252, 12323.816373008252, 12323... | 1.320827 | 0.946282 |
| CTL05A | 5002.928069 | 0.022183 | [544.2033449999999, 622.2971999999999, 1262.68... | [0.0, 9.0, 15.0, 23.0, 24.0, 33.0, 39.0, 39.0,... | [5002.928069044476, 6108.407285775167, 6977.99... | 0.847829 | 0.938065 |
| H1 | 2796.467958 | 0.022727 | [940.9516199999999, 989.5223759999999, 2098.82... | [0.0, 9.0, 15.0, 24.0, 33.0, 39.0, 47.0, 48.0,... | [2796.4679575700216, 3431.171930533456, 3932.4... | 0.259223 | 0.981416 |
| CTL02A | 11641.766203 | 0.023222 | [627.73808, 876.66455, 1868.6919939999998, 234... | [0.0, 0.0, 5.0, 15.0, 16.0, 20.0, 24.0, 24.0, ... | [11641.766203171926, 11641.766203171926, 13075... | 1.007616 | 0.979253 |
| CTL06F | 7817.512005 | 0.023313 | [71.0148, 449.70911, 533.89555, 851.713414, 13... | [0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 16.0, 20.0, 24.... | [7817.512005475825, 7817.512005475825, 7817.51... | 1.412709 | 0.985709 |
| CTL08A | 7258.029745 | 0.02409 | [127.16147999999998, 400.55144, 463.41458, 604... | [0.0, 0.0, 0.0, 0.0, 5.0, 15.0, 16.0, 17.0, 20... | [7258.029744896524, 7258.029744896524, 7258.02... | 1.538481 | 0.97063 |
| CTL09A | 7978.400332 | 0.024632 | [380.20518, 947.8093919999999, 1273.6660319999... | [0.0, 8.0, 9.0, 15.0, 24.0, 24.0, 24.0, 32.0, ... | [7978.400331917197, 9716.224353672082, 9958.53... | 0.998389 | 0.974265 |
fitted_df.r2.mean()
0.9703782325266634
fitted_df_filtered.r2.mean()
0.9803089931174649
fitted_df_line.r2.mean()
0.9513555669643324
We use the logarithm of the area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative. We then use the cumsum() function to obtain the cumulative sum over the discrete differential of the growth.
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in total_df.line.unique():
#color = color_dict[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
summary_df['cumulative'] = summary_df['derivative'].cumsum()
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
ax[ax_index].errorbar(y_pos, farray, marker = 'o')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Cumulative growth')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
total_df.columns
Index(['total_area', 'perc_area', 'mean_area_per_colony', 'n_colonies',
'time_point', 'confluency/generation', 'hour', 'month', 'day', 'line',
'datetime', 'norm_factor', 'perc_area_norm', 'err_bar_mean', 'n_split',
'split_time', 'pixel_size', 'Area (microm2)', 'Area (mm2)',
'line_split', 'mean_area_tp', 'area_error', 'std', 'cv', 'line_n_split',
'logArea'],
dtype='object')
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
cumulative_dict_dfs = {}
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 0:
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
summary_df['cumulative'] = summary_df['derivative'].cumsum()
cumulative_dict_dfs[f'{l}_{split}'] = summary_df
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
line, = ax.plot(y_pos, farray, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f6c28e8b400>
lines = total_df.line.unique()
highlight_growth_curves(all_lines,
xlabel = 'Hours from split',
ylabel = 'Cumulative growth',
lines = lines, fontsize = 20)
plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)
I fit here a linear regression model taking into account all the cumulatve sums of all the splits for each line. We extrapolate the slope as the rate of growth of the line.
cumulative_df = pd.concat(cumulative_dict_dfs.values(), keys = cumulative_dict_dfs.keys()).reset_index()
cumulative_df['line'] = cumulative_df.level_0.apply(lambda x: x.split('_')[0])
cumulative_df
| level_0 | level_1 | time_point | mean | stds | hour | month | day | datetime | split_time | smoothed | derivative | cumulative | line | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | H1_1 | 0 | 06_11_23_t18 | 48570.756 | 30156.872900 | 18 | 11 | 06 | 2023-11-06 18:00:00 | 0.0 | 110143.004205 | NaN | NaN | H1 |
| 1 | H1_1 | 2 | 07_11_23_t9 | 60913.200 | 94781.769346 | 9 | 11 | 07 | 2023-11-07 09:00:00 | 15.0 | 137570.951804 | 1828.529840 | 1828.529840 | H1 |
| 2 | H1_1 | 1 | 07_11_23_t18 | 19238.925 | 12090.104228 | 18 | 11 | 07 | 2023-11-07 18:00:00 | 24.0 | 195181.480282 | 6401.169831 | 8229.699671 | H1 |
| 3 | H1_1 | 4 | 08_11_23_t9 | 72999.585 | 102548.695788 | 9 | 11 | 08 | 2023-11-08 09:00:00 | 39.0 | 287232.524109 | 6136.736255 | 14366.435926 | H1 |
| 4 | H1_1 | 3 | 08_11_23_t18 | 55094.292 | 61100.627807 | 18 | 11 | 08 | 2023-11-08 18:00:00 | 48.0 | 417456.373402 | 14469.316588 | 28835.752514 | H1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 459 | CTL01A_5 | 3 | 20_11_23_t9 | 188056.464 | 63817.297072 | 9 | 11 | 20 | 2023-11-20 09:00:00 | 23.0 | 425051.456538 | 3051.706842 | 6177.820375 | CTL01A |
| 460 | CTL01A_5 | 2 | 20_11_23_t18 | 633256.125 | 453126.246308 | 18 | 11 | 20 | 2023-11-20 18:00:00 | 32.0 | 483465.005972 | 6490.394382 | 12668.214756 | CTL01A |
| 461 | CTL01A_5 | 5 | 21_11_23_t9 | 568493.484 | 354817.532310 | 9 | 11 | 21 | 2023-11-21 09:00:00 | 47.0 | 543394.794597 | 3995.319242 | 16663.533998 | CTL01A |
| 462 | CTL01A_5 | 4 | 21_11_23_t17 | 386422.080 | NaN | 17 | 11 | 21 | 2023-11-21 17:00:00 | 55.0 | 592580.870876 | 6148.259535 | 22811.793533 | CTL01A |
| 463 | CTL01A_5 | 6 | 22_11_23_t10 | 1151778.360 | NaN | 10 | 11 | 22 | 2023-11-22 10:00:00 | 72.0 | 620324.506463 | 1631.978564 | 24443.772097 | CTL01A |
464 rows × 14 columns
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(cumulative_df.sort_values(by = 'split_time'), col="line", palette=line_palette,
col_wrap=4, height=5)
grid.map(sns.regplot, "split_time", "cumulative", order = 1)
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
fitted_model = {}
for l in cumulative_df.line.unique():
sub = cumulative_df[cumulative_df.line == l]
sub = sub.sort_values(by = 'datetime')
key = f'{l}'
fitted_model[key] = {}
fitted_model[key]['slope'], fitted_model[key]['intercept'], fitted_model[key]['rvalue'], fitted_model[key]['pvalue'], fitted_model[key]['stderr'] = stats.linregress(sub['split_time'], sub['cumulative'].fillna(0))
fitted_model_df = pd.DataFrame.from_dict(fitted_model).T.reset_index()
fitted_model_df['line'] = fitted_model_df['index'].apply(lambda x: x.split('_')[0])
fitted_model_df.sort_values(by = 'slope')
| index | slope | intercept | rvalue | pvalue | stderr | line | |
|---|---|---|---|---|---|---|---|
| 11 | CTL01A | 644.317934 | -2403.923983 | 0.658753 | 3.427561e-04 | 153.439932 | CTL01A |
| 0 | H1 | 1092.216426 | -17870.728308 | 0.929554 | 3.246271e-13 | 83.369973 | H1 |
| 1 | CTL04E | 1957.784502 | -21455.175368 | 0.849852 | 4.029196e-12 | 196.950623 | CTL04E |
| 3 | CTL05A | 2002.574201 | -24668.872554 | 0.858594 | 5.454580e-12 | 199.285868 | CTL05A |
| 5 | KTD8.2 | 2123.269235 | -46580.571519 | 0.961890 | 3.878081e-24 | 95.435362 | KTD8.2 |
| 10 | UCSFi001-A | 2410.553184 | -34882.929624 | 0.873562 | 1.020601e-14 | 207.238467 | UCSFi001-A |
| 4 | H9 | 3810.734488 | -62725.125730 | 0.940884 | 5.551798e-19 | 225.539637 | H9 |
| 9 | CTL07C | 4069.466071 | -98092.360468 | 0.872476 | 2.224665e-13 | 369.740303 | CTL07C |
| 8 | CTL08A | 4263.987240 | -102963.382905 | 0.888082 | 9.120149e-16 | 340.563762 | CTL08A |
| 7 | CTL06F | 4575.687405 | -109378.476161 | 0.905129 | 3.365469e-17 | 331.626382 | CTL06F |
| 6 | CTL09A | 5028.233033 | -88363.366680 | 0.895983 | 1.307039e-14 | 409.717545 | CTL09A |
| 2 | CTL02A | 5231.682227 | -91740.048754 | 0.858920 | 1.340509e-12 | 506.011614 | CTL02A |
fitted_model_df.rvalue.mean()
0.8744732639082722
fitted_model_df.sort_values(by = 'slope').to_csv('../../data/csv/iPSC_fitted_lm_grouped.csv')